Column

Time Series: IMDb Rating of Each Episode

Map: Most-binged TV show in every state in 2018

Column

Wordcloud

Network

Network for selected characters from the Office

Lolipop chart

---
title: "Data Analysis for The Office"
output: 
  flexdashboard::flex_dashboard:
    orientation: columns
    social: menu
    source_code: embed
    vertical_layout: fill
    logo: logo.JPG
---
 

```{r setup, include=FALSE}
library(flexdashboard)
library(schrute)
library(plotly) 
library(rtweet)
library(stringr)
library(tidytext)
library(dplyr)
library(ggplot2)
library(maps)
library(raster)
library(rgdal)
library(classInt)
library(RColorBrewer)
library(crosstalk)
library(tidyr)
library(wordcloud2)
library(wordcloud)
library(stringr)
library(varhandle)
library(igraph)
library(networkD3)
```

Column {data-width=520}
-----------------------------------------------------------------------
### Time Series: IMDb Rating of Each Episode
```{r}
rate <- read.csv("rating.csv")
rate <- rate[, -which(names(rate) %in% c('Description','Num_Votes'))]
```

```{r}
# unique(rate$Episode)
eps <- c(1:26)
eps <- as.factor(eps)
```

```{r}
rate1 <- subset(rate, Season == 1)$Rating
rate2 <- subset(rate, Season == 2)$Rating
rate3 <- subset(rate, Season == 3)$Rating
rate4 <- subset(rate, Season == 4)$Rating
rate5 <- subset(rate, Season == 5)$Rating
rate6 <- subset(rate, Season == 6)$Rating
rate7 <- subset(rate, Season == 7)$Rating
rate8 <- subset(rate, Season == 8)$Rating
rate9 <- subset(rate, Season == 9)$Rating
```

```{r}
lines <- data.frame(eps, rate1, rate2, rate3, rate4, rate5, rate6, rate7, rate8, rate9)
```

```{r}
fig <- plot_ly(lines, x = ~eps, y = ~rate1, name = 'Season 1', type = 'scatter', mode = 'lines', width = 1.5, hoverinfo = "Season", line = list(color = 'rgba(192,192,192,0.4)')) 
fig <- fig %>% add_trace(y = ~rate2, name = 'Season 2', line = list(color = 'rgba(192,192,192,0.4)'))
fig <- fig %>% add_trace(y = ~rate3, name = 'Season 3', line = list(color = 'rgb(205, 12, 24)'))
fig <- fig %>% add_trace(y = ~rate4, name = 'Season 4', line = list(color = 'rgba(192,192,192,0.4)'))
fig <- fig %>% add_trace(y = ~rate5, name = 'Season 5', line = list(color = 'rgba(192,192,192,0.4)'))
fig <- fig %>% add_trace(y = ~rate6, name = 'Season 6', line = list(color = 'rgba(192,192,192,0.4)'))
fig <- fig %>% add_trace(y = ~rate7, name = 'Season 7', line = list(color = 'rgba(192,192,192,0.4)'))
fig <- fig %>% add_trace(y = ~rate8, name = 'Season 8', line = list(color = 'rgb(22, 96, 167)'))
fig <- fig %>% add_trace(y = ~rate9, name = 'Season 9', line = list(color = 'rgba(192,192,192,0.4)'))
fig <- fig %>% layout(title = "IMDb Rating of Each Episode",
         xaxis = list(title = "Episode"),
         yaxis = list (title = "IMDb Rating"))

fig
```

### Map: Most-binged TV show in every state in 2018
```{r}
library(maps)
us_states <- map_data("state")
```

```{r}
show <- read.csv("show.csv")
```

```{r}
show_freq <- data.frame(table(show$pop_show))
names(show_freq)[names(show_freq)=='Var1'] <- 'Show'
show_freq = show_freq %>% mutate(rank = dense_rank(desc(Freq)))
show_freq = show_freq[(order(show_freq$rank)),]
# sort(table(show$pop_show, decreasing = T))

show <- left_join(show, show_freq, by=c('pop_show'='Show'))
show = show[(order(show$rank)),]
names(show)[names(show)=='ï..state'] <- 'state'

show$region <- tolower(show$state)
```

```{r}
library(raster)
library(rgdal)
library(classInt)
library(RColorBrewer)

show_map <- left_join(show, us_states, by=c('region'='region'))
# show_map$subregion <- NULL
show_map$state <- NULL
# reorder legend
show_map$label <- factor(show_map$label, levels = c("The Office", "13 Reasons Why", "Grey's Anatomy", "Supernatural", "Friends", "other"))
```

```{r}
mycolors = c(brewer.pal(name="Set3", n = 4), brewer.pal(name="Set1", n = 2))

p0 <- ggplot(data = show_map,
            aes(x = long, y = lat, group = group, fill = label)) + theme_bw()

p1 = p0 + geom_polygon(color = "#453d35", size = 0.1) +
    coord_map(projection = "albers", lat0 = 39, lat1 = 45)

p2 <- p1 +
  labs(title = "The most-binged TV show in every state in 2018", fill = NULL) + scale_fill_manual(values=c("#377EB8", "#8DD3C7", "#BEBADA", "#e78b8b", "#f0b873", "#f9f9d8")) + theme(plot.title = element_text(hjust = 0.5))

p2
```

Column {.tabset data-width=400}
-----------------------------------------------------------------------
### Wordcloud

```{r}
df <- schrute::theoffice
## Create a dataset with characters and their scripts only
script <- df[,c('character','text')]
```

```{r}
library(crosstalk)
library(dplyr)
library(plotly)
library(tidyr)
```

```{r}
token.script <- script %>% 
# dplyr::filter(character == "Pam") %>% 
tidytext::unnest_tokens(word, text)

stop_words <- tidytext::stop_words
tidy.token.script <- token.script %>% dplyr::anti_join(stop_words, by = "word")

word_freq = tidy.token.script %>% 
  group_by(character) %>% 
  dplyr::count(word, sort = TRUE) 
word_freq = data.frame(word_freq)

word_list = c("uh", "ah", "um", "huh", "yeah", "hey", "gonna")
word_freq <- word_freq[! word_freq$word %in% word_list, ]

names(word_freq)[names(word_freq)=='n'] <- 'freq'

main_char = c("Michael", "Dwight", "Jim", "Pam", "Andy", "Angela", "Kevin", "Erin", "Oscar", "Creed")
word_freq <- word_freq[word_freq$character %in% main_char, ]
```

```{r}
library("wordcloud2")
library("wordcloud")

par(mfrow=c(2,3))
# par(mar = rep(0,4))

top_wc <- function(x) {
  par(mar = rep(0,4))
  data = subset(word_freq, character == x)
  wordcloud(words = data$word, freq = data$freq, min.freq = 5,
          # scale=c(1,0.25),
          scale=c(2,0.5),          
          max.words=100, random.order=FALSE, rot.per=0.35,
          colors=brewer.pal(8, "Dark2"), main = "hi") 
  # mtext(paste0("Word cloud for ", x), side = 3, cex = 0.25)
  title(paste0("Word cloud for ", x))
}

top_wc("Michael")
top_wc("Dwight")
top_wc("Jim")
top_wc("Pam")
top_wc("Andy")
top_wc("Creed")
```

### Network
```{r}
library(stringr)
nw <- read.csv("network.csv")
names(nw)[names(nw)=='ï..source'] <- 'source'
nw$target = str_to_title(nw$target)

##### Define Edges
edge = nw
library(varhandle)
edge$source <- unfactor(edge$source)

##### Define Nodes
node1 <- unique(edge$target)
node2 <- unique(edge$source)
node <- c(node1, node2)
node <- unique(node)
node = as.data.frame(node) 

names(node)[names(node)=='node'] <- 'name'
node$group <- ifelse(node$name == 'Dwight' | node$name == 'Michael' | node$name == 'Jim' | node$name == 'Pam', 1, 2)
```

```{r}
library(igraph)
library(networkD3)

MisLinks = read.csv("nw2.csv")
MisNodes = read.csv("node.csv")

MisNodes$group <- as.integer(MisNodes$group)

names(MisLinks)[names(MisLinks)=='ï..source'] <- 'source'
MisLinks$value_adj = MisLinks$value/25
```

```{r}
ColourScale <- 'd3.scaleOrdinal()
            .domain(["main", "other"])
           .range(["#8961ab", "#a3e6f4"]);'

fn <- forceNetwork(Links = MisLinks, Nodes = MisNodes, Source = "source",
             Target = "target", Value = "value_adj", NodeID = "name",
             Group = "group", opacity = 0.9, Nodesize = 3, 
             linkDistance = 100, fontSize = 20,
             # colourScale = JS("d3.scaleOrdinal(d3.schemeCategory10);"))
             colourScale = JS(ColourScale))

fn <- htmlwidgets::prependContent(fn, htmltools::tags$h3("Network for selected characters from the Office"))

fn <- htmlwidgets::onRender(
  fn,
  'function(el, x) { 
    d3.selectAll(".legend text").style("fill", "white");
    d3.select("body").style("background-color", "#fffeee");
    d3.select("h3").style("justify-content", "center").style("text-align", "center");
  }'
)

fn
```

### Lolipop chart
```{r}
library(rtweet)
```

```{r}
num_tweets <- 2000
TheOffice <- search_tweets('#TheOffice', n = num_tweets, include_rts = FALSE)
```

```{r}
library(stringr)
library(tidytext)
library(dplyr)

reg <- "([^A-Za-z\\d#@']|'(?![A-Za-z\\d#@]))"
TheOffice_words <- TheOffice %>% dplyr::select (status_id, text) %>%
  filter(!str_detect(text, '^"')) %>%
  mutate(text = str_replace_all(text, "https://t.co/[A-Za-z\\d]+|&", "")) %>%
  unnest_tokens(word, text, token = "regex", pattern = reg) %>%
  filter(!word %in% stop_words$word,
         str_detect(word, "[a-z]"))

# TheOffice_words %>% group_by(word) %>% summarize(n = n()) %>% arrange(desc(n)) %>% top_n(30)
# 
# TheOffice_Characters <- TheOffice_words %>% filter(word =='#michaelscott'| word=='michael' | word=='dwight' | word== 'jim' | word== 'pam')
# TheOffice_Characters %>% group_by(word) %>% summarize(n = n()) %>% arrange(desc(n)) %>% top_n(5)
```

```{r}
Characters_Count <- data.frame("Name"=c("Michael", "Dwight", "Jim", "Pam"), "Mentions"= c(202,84,78,60), stringsAsFactors = FALSE)
```

```{r}
library(ggplot2)

ggplot(Characters_Count, aes(x=Name, y=Mentions), label = Mentions) +
  geom_segment( aes(x=Name, xend=Name, y=0, yend=Mentions), color="skyblue") +
  geom_point( color="blue", size=7, alpha=0.6) +
  geom_text(label = Characters_Count$Mentions, color = "white", size = 3.5) +
  ggtitle("Popularity of characters from \n2000 random 'the Office-related' tweets") + xlab("Character Names") + ylab("Number of Mentions") +
  theme_light() +
  coord_flip() +
  theme(
    panel.grid.major.y = element_blank(),
    panel.border = element_blank(),
    axis.ticks.y = element_blank(),
    plot.title = element_text(hjust = 0.5)
  )
```